global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/df_ovb.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

qui do $code_dir/config/labeling_finalvars.do
qui do ${code_dir}/config/country_list.do
qui do ${code_dir}/config/labeling_indepvars_function.do

* Here, we build additional control variabels: Namely, offshoring and recent innovations.

************************************
* OFFSHORING Indep var
************************************

use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year

*Add GDP PER CAPITA (constant prices, USD)
mmerge year using  ${dataset_dir}/import/offshoring_data.dta, unmatched(none)

* weights based on 10-years patent portfolio pre-1980, etc
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
gen missing_weights_1995=(_m==1)
drop _m
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
sort BvD year
foreach vv in offshoring {
	foreach ctry of global countrylist1995{
		*confirm vairable exists, if not, create it as missing
		noisily capture confirm variable `vv'_`ctry'
		if _rc != 0 {
				gen `vv'_`ctry' = .
		}
		*countryweights are patent weights unless missing (then use gdp)
		gen weight_`ctry' = share2_all_1995_`ctry'
		replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .
		
		*create weighted values
		gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
		gen ln`vv'_wtd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
	}

	* sum up weighted values
	egen `vv'_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing
	egen ln`vv'_ALL_1995_wtd = rowtotal(ln`vv'_wtd_??), missing

    * home and foreign shares with home  and its share as the country with the highest patent weight.
	* Note: if two countries have the same weight, the one later in the lsit would overwrite the first one
	* However, when testing this it never happened.
	gen `vv'_shr_home_1995_wtd = .
	egen maxshare = rowmax(weight_??)
	foreach ctry of global countrylist1995{
		replace `vv'_shr_home_1995_wtd = `vv'_wtd_`ctry' if maxshare==weight_`ctry'
	}

	* create foreign share as complement to homesahre and and normalize the weights
	gen `vv'_shr_foreign_1995_wtd = `vv'_ALL_1995_wtd - `vv'_shr_home_1995_wtd
	replace `vv'_shr_foreign_1995_wtd = . if maxshare == 1
	gen `vv'_shr2_home_1995_wtd = `vv'_shr_home_1995_wtd / maxshare
	gen `vv'_shr2_foreign_1995_wtd = `vv'_shr_foreign_1995_wtd / (1-maxshare)
	drop maxshare
	drop *`vv'_wtd_* weight_??
}

drop share*
drop lnoffshoring*
drop offshoring_??

*labeling
labelingvars "offshoring"

compress
save ${final_dir}/bvd_year_offshoring_sharesgdpweighted${weight_window}_${wtype}.dta, replace

***************************************
* Recent Innovations - Spillover Stocks
***************************************

clear
set maxvar 32767 

* technology-specific country year invention counts 

foreach tech in auto95 {

	local techfile ${dataset_dir}/patent_list/`tech'_patents.dta
	
	use `techfile', clear

	*merge authority  and its country
	mmerge appln_id using ${commondata_dir}/patstat_2018b/appln_info.dta, unmatched(none) ukeep(appln_auth)
	mmerge appln_id using ${commondata_dir}/patstat_2018b/PRS_EPO_national_phase.dta, unmatched(master)
	replace appln_auth = country if _m == 3
	drop _m country
	ren appln_auth auth_country

	*correct Germany/GDR and Russia/USSR
	do ${code_dir}/config/auth_map.do auth_country

	*load patent family information: the year the patent was first filed 
	mmerge appln_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(docdb_family_id fam_earliest_appln_year)
	drop appln_id _m
	*remove duplicates counts of authority country in a family
	duplicates drop
	ren fam_earliest_appln_year year

	*drop if year is missing
	drop if year==9999
	gen byte x = 1
	*count the number of distinct countries in which the patent was filed
	bysort docdb : egen nb_auth_ctries = sum(x)

	*"split" the patent amnong the countries equally, no matter number of filings per country
	gen inv_nb_auth_ctries = 1/nb_auth_ctries
	drop nb_auth_ctries

	*merge with biadic information
	mmerge docdb_family_id using ${dataset_dir}/patstat_orbis/docdb_families2.dta, unmatched(master) ukeep(biadic_D)
	ren biadic_D bia
	drop _m x
	gen byte x=1

	*count the number of biadic patents in each country and year
	bysort auth_country year : egen nb_bia = sum(inv_nb_auth*bia*x)
	keep year auth_country nb_*
	duplicates drop
	sort auth year

	*labeling

	save ${dataset_dir}/spillovers/ctry_auth_count_`tech'.dta, replace
}

* country year invention counts in all technologies
*get aplication information
use ${commondata_dir}/patstat_2018b/appln_info.dta, clear
keep appln_id appln_auth
mmerge appln_id using ${commondata_dir}/patstat_2018b/PRS_EPO_national_phase.dta, unmatched(master)
replace appln_auth = country if _m == 3
drop _m country
ren appln_auth auth_country

*correct Germany/GDR and Russia/USSR, load citations and patent family information
do ${code_dir}/config/auth_map.do auth_country
mmerge appln_id using ${commondata_dir}/patstat_2018b/family_info.dta, unmatched(master) ukeep(docdb_family_id fam_earliest_appln_year)
drop appln_id _m

*removes duplicates counts of authority country in a family and count
duplicates drop
ren fam_earliest_appln_year year
drop if year==9999
gen byte x=1
bysort docdb : egen nb_auth_ctries = sum(x)

*"split" the patent among the countries equally, no matter number of filings per country
gen inv_nb_auth_ctries = 1/nb_auth_ctries
drop nb_auth_ctries x
*get biadic set
mmerge docdb_family_id using ${dataset_dir}/patstat_orbis/docdb_families2.dta, unmatched(master) ukeep(biadic_D)
ren biadic_D bia
drop _m

*count the number of biadic patents in each country and year
bysort auth_country year : egen nb_bia = sum(inv_nb_auth*bia)
keep year auth_country  nb_*
duplicates drop
sort auth year

*labeling

save ${dataset_dir}/spillovers/ctry_auth_count_alltechs.dta, replace


************* average shares (for companies that have 0 patent in pre-sample period)

foreach tech in auto95 alltechs {
	use  ${dataset_dir}/spillovers/ctry_auth_count_`tech'.dta, clear
	drop if year==9999
	drop if year>1995
	drop if year<1995-9
	collapse (sum) nb_*, by(auth_country)

	egen total_bia=sum(nb_bia)
	gen share_WD_total_bia_ = nb_bia / total_bia
	drop nb* total*
	gen x = 1
	reshape wide share_*, i(x) j(auth_country) string
	keep sh*
	save ${dataset_dir}/weights/shares_auth_allctries_`tech'_1995.dta, replace
}


* Country level invention stocks by tech
use ${dataset_dir}/spillovers/ctry_auth_count_alltechs.dta, clear
keep if year>1899
fillin auth year
ren nb_bia bia_alltechs
replace bia_alltechs=0 if bia_alltechs==.
drop _f

* define counts of non automation patents
foreach tech in auto95 {
	mmerge auth year using ${dataset_dir}/spillovers/ctry_auth_count_`tech'.dta, unmatched(master)
	ren nb_bia bia_`tech'
	replace bia_`tech'=0 if bia_`tech'==.
	gen bia_N_`tech' = bia_alltechs - bia_`tech'
	replace bia_N_`tech'=0 if bia_N_`tech'==.		
}

drop _m

*generate stocks of technology-specific biadic patents
qui ds, has(type float)
foreach pt in `r(varlist)' {
	bysort auth (year): gen k`pt'=`pt'+ `pt'[_n-1] + `pt'[_n-2] if _n>2
}
keep if year>1969
compress

tempfile auth_stocks
save `auth_stocks', replace

*stocks used in spillovers (countrystocks)
use `auth_stocks', clear
keep auth_country year k*
drop *_alltechs
keep year auth_country kbia_*
foreach pt of varlist k* {
	ren `pt' `pt'_
}
reshape wide k*, i(year) j(auth_country) string
compress
save ${dataset_dir}/spillovers/ctry_auth_stocks_wide_bia_recent_innovation.dta, replace


***********************************************
* Recent Innovations - Calculate Spillover Vars
***********************************************


clear
set maxvar 32767

*import thee homecountry by patent share
*this necessitates that this file be run only AFTER the indepvars.do file!!!!
use ${dataset_dir}/indep_vars/bvd_year_country_multinational_from1970_tfacit1.dta, clear
keep BvD country_shr_1995
bys BvD : gen x = _n
keep if x == 1
drop x
compress
tempfile maxweight
save `maxweight', replace

*import the patent weights
use ${dataset_dir}/weights/bvdid_pat_weights_EPtr_from1970_1995_orbis2017_tfacit1.dta, clear
tempfile wtfacit1
save `wtfacit1'

*import the inventorweigths of countries (based on unique nationalities of patent applicants)
use ${dataset_dir}/weights/bvdid_inventor_weights_all_from1970_1995_orbis2017.dta, clear
foreach ctry of global invtcountrylist {
	ren share_invt_1995_`ctry' share2_all_1995_`ctry'
}
tempfile wiw
save `wiw'

global clist_iw $invtcountrylist 
global clist_tfacit1 $countrylist1995
global clist ${clist_${wtype}}

* Note: This program always uses the average inventor weight distribution if a firm has no inventor weight
* This matters never because we only use firms if they have spillover weights (which is usually the case)
* Since this is just a robustness check, I did not bother recomputing the average weight distribution for firms we never use.

* SPILLOVERS variables (weighted using patents since 1970)

foreach xxx in auto95 {
    use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
	* Add country-level invention stocks
	mmerge year using ${dataset_dir}/spillovers/ctry_auth_stocks_wide_bia_recent_innovation.dta, unmatched(master) ukeep(kbia_`xxx'* kbia_N_`xxx'*)
	mmerge BvD using `maxweight', unmatched(master)
	* weights based on 10-years patent portfolio pre-1980, etc
	mmerge BvD using `w${wtype}', unmatched(master) 
	gen missing_dspill_weights_1995=(_m==1)
	drop _m
	* average shares (for companies that have 0 patent pre-sample)
	* See note above.
	cross using ${dataset_dir}/weights/shares_auth_allctries_alltechs_1995.dta
	
	gen dshare = .
	*define weights using the average shares
	foreach ctry of global clist{
		noisily capture confirm variable share_WD_total_inv_`ctry'
		if _rc != 0 {
			gen share_WD_total_inv_`ctry' = 0
		}
		gen weight_`ctry' = share2_all_1995_`ctry'
		replace weight_`ctry' = share_WD_total_inv_`ctry' if weight_`ctry' == .
		replace dshare = weight_`ctry' if country_shr_1995 =="`ctry'"
	}
	
	*check variables and generate weighted values
	sort BvD year
	foreach vv in `xxx' N_`xxx' {
		foreach ctry in $clist {
			noisily capture confirm variable kbia_`vv'_`ctry'
			if _rc != 0 {
				gen kbia_`vv'_`ctry' = 0
			}
			 *generate weighted values for biadic auto95 patents
			gen kbia`vv'_wtd_`ctry' = weight_`ctry' * kbia_`vv'_`ctry'
		}
	}
	*genearte summed up values
	foreach vv in `xxx' N_`xxx'{
		egen dspill_`vv'_1995_wtd = rowtotal(kbia`vv'_wtd_??), missing
		
		gen dspill_`vv'_h_shr_1995_wtd = .
		*set home share based on largest weight
		foreach ctry of global clist{
			replace dspill_`vv'_h_shr_1995_wtd = kbia`vv'_wtd_`ctry' if country_shr_1995 =="`ctry'"
		}
		*set foreign share as complement
		gen dspill_`vv'_f_shr_1995_wtd = dspill_`vv'_1995_wtd - dspill_`vv'_h_shr_1995_wtd
		*normalize weights
		gen dspill_`vv'_h_shr2_1995_wtd = dspill_`vv'_h_shr_1995_wtd / dshare  
		gen dspill_`vv'_f_shr2_1995_wtd = dspill_`vv'_f_shr_1995_wtd / (1-dshare)	
	}
	drop *wtd_* share* *WD*


	*generate spillovers for auto and not auto patents
	gen dspill`xxx'_1995_a0 = dspill_`xxx'_1995_wtd==0
	gen dspillN`xxx'_1995_a0 = dspill_N_`xxx'_1995_wtd==0
	gen dspill`xxx'_1995_a = log(dspill_`xxx'_1995_wtd)
	gen dspillN`xxx'_1995_a = log(dspill_N_`xxx'_1995_wtd)
	replace dspill`xxx'_1995_a = 0 if dspill`xxx'_1995_a0 == 1
	replace dspillN`xxx'_1995_a  = 0 if dspillN`xxx'_1995_a0 == 1
	drop dspill_`xxx'_1995_wtd dspill_N_`xxx'_1995_wtd
	
	*generate logs of the foreign and home shares of the weight normalized spillovers
	foreach vy in f_shr f_shr2 h_shr h_shr2 {
		gen dspill`xxx'_`vy'_1995_a0 = dspill_`xxx'_`vy'_1995_wtd==0
		gen dspillN`xxx'_`vy'_1995_a0 = dspill_N_`xxx'_`vy'_1995_wtd==0
		gen dspill`xxx'_`vy'_1995_a = log(dspill_`xxx'_`vy'_1995_wtd)
		gen dspillN`xxx'_`vy'_1995_a = log(dspill_N_`xxx'_`vy'_1995_wtd)
		replace dspill`xxx'_`vy'_1995_a = 0 if dspill`xxx'_`vy'_1995_a0 == 1
		replace dspillN`xxx'_`vy'_1995_a = 0 if dspillN`xxx'_`vy'_1995_a0 == 1
		drop dspill_`xxx'_`vy'_1995_wtd dspill_N_`xxx'_`vy'_1995_wtd
	}
	
	drop kbia*
	drop weight_?? country_shr_1995

	*create  fully normalized spillover shares
	qui ds *_h_shr_1995_a
	foreach var in `r(varlist)' {
		local vv = substr("`var'",1,strpos("`var'","_")-1)
		gen `vv'_h_shr4_1995_a0 = `vv'_h_shr2_1995_a0
		gen `vv'_h_shr5_1995_a0 = `vv'_h_shr2_1995_a0 if year >= 1995 & year <=2009
		gen term_home_var = exp(`vv'_h_shr2_1995_a) / exp(`vv'_1995_a) * dshare
		gen term_foreign_var = exp(`vv'_f_shr2_1995_a) / exp(`vv'_1995_a) * (1-dshare)
		gen _term_home = term_home_var if year==1995	
		gen _term_foreign = term_foreign_var if year==1995
		bys BvD : egen term_home_fixed = max(_term_home)
		bys BvD : egen term_foreign_fixed = max(_term_foreign)
		gen `vv'_h_shr4_1995_a = `vv'_h_shr2_1995_a * term_home_fixed
		gen `vv'_f_shr4_1995_a = `vv'_f_shr2_1995_a * term_foreign_fixed
		drop term_* _term_*
	}

	drop dshare

	*labeling


	compress
	
	save ${final_dir}/bvd_year_dspillovers_`xxx'_bia_${wtype}_recent_innovation.dta, replace
}

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat